###########################################################################################################
# Substantive interpretaion: Government formation
###########################################################################################################
# load data
library(readstata13)
source("replication_MS/cbq_function.R")

load("replication_MS/cbq_q1.RData")
q1 <- summary(ms_cbq_stan)
q1 <- q1$summary
q1 <- q1[-14,c(1,4,8)]
load("replication_MS/cbq_q2.RData")
q2 <- summary(ms_cbq_stan)
q2 <- q2$summary
q2 <- q2[-14,c(1,4,8)]
load("replication_MS/cbq_q3.RData")
q3 <- summary(ms_cbq_stan)
q3 <- q3$summary
q3 <- q3[-14,c(1,4,8)]
load("replication_MS/cbq_q4.RData")
q4 <- summary(ms_cbq_stan)
q4 <- q4$summary
q4 <- q4[-14,c(1,4,8)]
load("replication_MS/cbq_q5.RData")
q5 <- summary(ms_cbq_stan)
q5 <- q5$summary
q5 <- q5[-14,c(1,4,8)]
load("replication_MS/cbq_q6.RData")
q6 <- summary(ms_cbq_stan)
q6 <- q6$summary
q6 <- q6[-14,c(1,4,8)]
load("replication_MS/cbq_q7.RData")
q7 <- summary(ms_cbq_stan)
q7 <- q7$summary
q7 <- q7[-14,c(1,4,8)]
load("replication_MS/cbq_q8.RData")
q8 <- summary(ms_cbq_stan)
q8 <- q8$summary
q8 <- q8[-14,c(1,4,8)]
load("replication_MS/cbq_q9.RData")
q9 <- summary(ms_cbq_stan)
q9 <- q9$summary
q9 <- q9[-14,c(1,4,8)]


q_mean <- cbind(
  q1[,1],
  q2[,1],
  q3[,1],
  q4[,1],
  q5[,1],
  q6[,1],
  q7[,1],
  q8[,1],
  q9[,1])


q_25 <- cbind(
  q1[,2],
  q2[,2],
  q3[,2],
  q4[,2],
  q5[,2],
  q6[,2],
  q7[,2],
  q8[,2],
  q9[,2]
)


q_975 <- cbind(
  q1[,3],
  q2[,3],
  q3[,3],
  q4[,3],
  q5[,3],
  q6[,3],
  q7[,3],
  q8[,3],
  q9[,3]
)
# variable names
var_names <- c(
  "Minority Coalition",
  "Minimal Winning Coalition",
  "Number of Parties in the Coalition",
  "Largest Party in the Coalition",
  "Median Party in the Coalition",
  "Ideological Divisions in the Coalition",
  "Ideological Divisions within Majority Opposition",
  "Previous Prime Minister in the Coalition",
  "Incumbent Coalition",
  "Minority Coalition where Investiture Vote Required",
  "Anti-System Presence in the Coalition",
  "Pre-Electoral Pact associated with the Coalition",
  "Anti-Pact associated with the Coalition"
)

library(rstan)
# read in original data
dat0 <- read.table("replication_MS/formation_new.tab",sep = "\t",header=T)
dat0$gnum <- 1:dim(dat0)[1]
dat <- dat0[order(dat0$case,dat0$realg),]
X <- as.matrix(subset(dat,select = c(minor,
                                    minwin,
                                    numpar,
                                    dompar,
                                    median,
                                    gdiv1,
                                    mgodiv1,
                                    prevpm,
                                    sq,
                                    mginvest,
                                    anmax2,
                                    pro,
                                    anti)))
y <- dat$realg
################################################################
# Prediction accuracy acros quantiles
################################################################

coefs <- q_mean[1:13,]
gnum <- dat$gnum
xb <- X %*% coefs
pred_prob1 <- pald(xb[,1],0.1)
pred_prob2 <- pald(xb[,2],0.2)
pred_prob3 <- pald(xb[,3],0.3)
pred_prob4 <- pald(xb[,4],0.4)
pred_prob5 <- pald(xb[,5],0.5)
pred_prob6 <- pald(xb[,6],0.6)
pred_prob7 <- pald(xb[,7],0.7)
pred_prob8 <- pald(xb[,8],0.8)
pred_prob9 <- pald(xb[,9],0.9)
tmp <- cbind(gnum,pred_prob1,pred_prob2,pred_prob3,pred_prob4,pred_prob5,pred_prob6,pred_prob7,
            pred_prob8,pred_prob9,y)
tmp_y <- tmp[which(tmp[,11]==1),]

tmp_y <- as.data.frame(tmp_y)
tmp_y$which_max <- NA
for (i in 1:220){
  tmp_y$which_max[i] <- which.max(tmp_y[i,2:10])
}

# read in GGG
prob_diff_GGG <- read.dta13("replication_MS/prob_diff_GGG.dta")
diff <- subset(prob_diff_GGG,select = c(gnum,clprobm1,mixprobm1))
tmp_y <- merge(tmp_y,diff,by="gnum",all.x=T)
tmp_y_order <- tmp_y[order(tmp_y$which_max,tmp_y$pred_prob1),]

########################################################################################################################
# Prediction accuracy
# Predicted probabilities of 220 observations (CBQ and CL and MXL)

real_y <- dat$realg
dat$ind <- as.integer(as.factor(dat$case))
dattmp <- subset(dat,select = c(gnum, ind, realg))
dattmp$pred1 <- pred_prob1
dattmp$pred2 <- pred_prob2
dattmp$pred3 <- pred_prob3
dattmp$pred4 <- pred_prob4
dattmp$pred5 <- pred_prob5
dattmp$pred6 <- pred_prob6
dattmp$pred7 <- pred_prob7
dattmp$pred8 <- pred_prob8
dattmp$pred9 <- pred_prob9
dattmp <- merge(dattmp,diff,by="gnum",all.x=T)
dattmp <- dattmp[order(dattmp$ind),]


#####################
# CL
cl_pred <- NA
for (i in 1:220){
  subsettmp <- subset(dattmp$clprobm1, dattmp$ind==i)
  predd <- rep(0,length(subsettmp))
  predd[which.max(subsettmp)] <- 1
  cl_pred <- c(cl_pred,predd)
}
cl_pred <- cl_pred[-1]
cl_pred_rate_all <- length(which(cl_pred==dattmp$realg))/33256 # 99.2
cl_pred_obs <- cl_pred[which(dattmp$realg==1)] 
cl_pred_rate_obs <- length(which(cl_pred_obs==1))/220 # 40.5%

#####################
# MXL
mxl_pred <- NA
for (i in 1:220){
  subsettmp <- subset(dattmp$mixprobm1, dattmp$ind==i)
  predd <- rep(0,length(subsettmp))
  predd[which.max(subsettmp)] <- 1
  mxl_pred <- c(mxl_pred,predd)
}
mxl_pred <- mxl_pred[-1]
mxl_pred_rate_all <- length(which(mxl_pred==dattmp$realg))/33256 # 99.2
mxl_pred_obs <- mxl_pred[which(dattmp$realg==1)] 
mxl_pred_rate_obs <- length(which(mxl_pred_obs==1))/220 # 40%

#####################
# CBQ-Q1
cbq1_pred <- NA
for (i in 1:220){
  subsettmp <- subset(dattmp$pred1, dattmp$ind==i)
  predd <- rep(0,length(subsettmp))
  predd[which.max(subsettmp)] <- 1
  cbq1_pred <- c(cbq1_pred,predd)
}
cbq1_pred <- cbq1_pred[-1]
cbq1_pred_rate_all <- length(which(cbq1_pred==dattmp$realg))/33256 # 99.2
cbq1_pred_obs <- cbq1_pred[which(dattmp$realg==1)] 
cbq1_pred_rate_obs <- length(which(cbq1_pred_obs==1))/220 # 42.3%


#####################
# CBQ-Q2
cbq2_pred <- NA
for (i in 1:220){
  subsettmp <- subset(dattmp$pred2, dattmp$ind==i)
  predd <- rep(0,length(subsettmp))
  predd[which.max(subsettmp)] <- 1
  cbq2_pred <- c(cbq2_pred,predd)
}
cbq2_pred <- cbq2_pred[-1]
cbq2_pred_rate_all <- length(which(cbq2_pred==dattmp$realg))/33256 # 99.2
cbq2_pred_obs <- cbq2_pred[which(dattmp$realg==1)] 
cbq2_pred_rate_obs <- length(which(cbq2_pred_obs==1))/220 # 39.5%


#####################
# CBQ-Q3
cbq3_pred <- NA
for (i in 1:220){
  subsettmp <- subset(dattmp$pred3, dattmp$ind==i)
  predd <- rep(0,length(subsettmp))
  predd[which.max(subsettmp)] <- 1
  cbq3_pred <- c(cbq3_pred,predd)
}
cbq3_pred <- cbq3_pred[-1]
cbq3_pred_rate_all <- length(which(cbq3_pred==dattmp$realg))/33256 # 99.2
cbq3_pred_obs <- cbq3_pred[which(dattmp$realg==1)] 
cbq3_pred_rate_obs <- length(which(cbq3_pred_obs==1))/220 # 38.2%


#####################
# CBQ-Q4
cbq4_pred <- NA
for (i in 1:220){
  subsettmp <- subset(dattmp$pred4, dattmp$ind==i)
  predd <- rep(0,length(subsettmp))
  predd[which.max(subsettmp)] <- 1
  cbq4_pred <- c(cbq4_pred,predd)
}
cbq4_pred <- cbq4_pred[-1]
cbq4_pred_rate_all <- length(which(cbq4_pred==dattmp$realg))/33256 # 99.2
cbq4_pred_obs <- cbq4_pred[which(dattmp$realg==1)] 
cbq4_pred_rate_obs <- length(which(cbq4_pred_obs==1))/220 # 35.9%


#####################
# CBQ-Q5
cbq5_pred <- NA
for (i in 1:220){
  subsettmp <- subset(dattmp$pred5, dattmp$ind==i)
  predd <- rep(0,length(subsettmp))
  predd[which.max(subsettmp)] <- 1
  cbq5_pred <- c(cbq5_pred,predd)
}
cbq5_pred <- cbq5_pred[-1]
cbq5_pred_rate_all <- length(which(cbq5_pred==dattmp$realg))/33256 # 99.2
cbq5_pred_obs <- cbq5_pred[which(dattmp$realg==1)] 
cbq5_pred_rate_obs <- length(which(cbq5_pred_obs==1))/220 # 37.3%


#####################
# CBQ-Q6
cbq6_pred <- NA
for (i in 1:220){
  subsettmp <- subset(dattmp$pred6, dattmp$ind==i)
  predd <- rep(0,length(subsettmp))
  predd[which.max(subsettmp)] <- 1
  cbq6_pred <- c(cbq6_pred,predd)
}
cbq6_pred <- cbq6_pred[-1]
cbq6_pred_rate_all <- length(which(cbq6_pred==dattmp$realg))/33256 # 99.2
cbq6_pred_obs <- cbq6_pred[which(dattmp$realg==1)] 
cbq6_pred_rate_obs <- length(which(cbq6_pred_obs==1))/220 # 37.3%

#####################
# CBQ-Q7
cbq7_pred <- NA
for (i in 1:220){
  subsettmp <- subset(dattmp$pred7, dattmp$ind==i)
  predd <- rep(0,length(subsettmp))
  predd[which.max(subsettmp)] <- 1
  cbq7_pred <- c(cbq7_pred,predd)
}
cbq7_pred <- cbq7_pred[-1]
cbq7_pred_rate_all <- length(which(cbq7_pred==dattmp$realg))/33256 # 99.2
cbq7_pred_obs <- cbq7_pred[which(dattmp$realg==1)] 
cbq7_pred_rate_obs <- length(which(cbq7_pred_obs==1))/220 # 38.6%

#####################
# CBQ-Q8
cbq8_pred <- NA
for (i in 1:220){
  subsettmp <- subset(dattmp$pred8, dattmp$ind==i)
  predd <- rep(0,length(subsettmp))
  predd[which.max(subsettmp)] <- 1
  cbq8_pred <- c(cbq8_pred,predd)
}
cbq8_pred <- cbq8_pred[-1]
cbq8_pred_rate_all <- length(which(cbq8_pred==dattmp$realg))/33256 # 99.1
cbq8_pred_obs <- cbq8_pred[which(dattmp$realg==1)] 
cbq8_pred_rate_obs <- length(which(cbq8_pred_obs==1))/220 # 34.5%

#####################
# CBQ-Q9
cbq9_pred <- NA
for (i in 1:220){
  subsettmp <- subset(dattmp$pred9, dattmp$ind==i)
  predd <- rep(0,length(subsettmp))
  predd[which.max(subsettmp)] <- 1
  cbq9_pred <- c(cbq9_pred,predd)
}
cbq9_pred <- cbq9_pred[-1]
cbq9_pred_rate_all <- length(which(cbq9_pred==dattmp$realg))/33256 # 99.2
cbq9_pred_obs <- cbq9_pred[which(dattmp$realg==1)] 
cbq9_pred_rate_obs <- length(which(cbq9_pred_obs==1))/220 # 40.9%


################################################################################################
# Counterfactuals
###############################################################################################


dat0 <- read.table("replication_MS/formation_new.tab",sep = "\t",header=T)
dat0$gnum <- 1:dim(dat0)[1]
counterfactual <- read.dta13("replication_MS/counterfactual.dta")
counterfactual <- subset(counterfactual,select = c(gnum,minwin2,mginvest2,minor2,dompar2))
dat0 <- merge(dat0,counterfactual,by="gnum",all.x=T)

dat <- dat0[order(dat0$case,dat0$realg),]

X <- as.matrix(subset(dat,select = c(minor,
                                    minwin,
                                    numpar,
                                    dompar,
                                    median,
                                    gdiv1,
                                    mgodiv1,
                                    prevpm,
                                    sq,
                                    mginvest,
                                    anmax2,
                                    pro,
                                    anti)))

X2 <- as.matrix(subset(dat,select = c(minor2,
                                    minwin2,
                                    numpar,
                                    dompar2,
                                    median,
                                    gdiv1,
                                    mgodiv1,
                                    prevpm,
                                    sq,
                                    mginvest2,
                                    anmax2,
                                    pro,
                                    anti)))


##############

xb1 <- X %*% coefs
xb2 <- X2 %*% coefs


pred1_prob1 <- pald(xb1[,1],0.1)
pred1_prob2 <- pald(xb1[,2],0.2)
pred1_prob3 <- pald(xb1[,3],0.3)
pred1_prob4 <- pald(xb1[,4],0.4)
pred1_prob5 <- pald(xb1[,5],0.5)
pred1_prob6 <- pald(xb1[,6],0.6)
pred1_prob7 <- pald(xb1[,7],0.7)
pred1_prob8 <- pald(xb1[,8],0.8)
pred1_prob9 <- pald(xb1[,9],0.9)

pred2_prob1 <- pald(xb2[,1],0.1)
pred2_prob2 <- pald(xb2[,2],0.2)
pred2_prob3 <- pald(xb2[,3],0.3)
pred2_prob4 <- pald(xb2[,4],0.4)
pred2_prob5 <- pald(xb2[,5],0.5)
pred2_prob6 <- pald(xb2[,6],0.6)
pred2_prob7 <- pald(xb2[,7],0.7)
pred2_prob8 <- pald(xb2[,8],0.8)
pred2_prob9 <- pald(xb2[,9],0.9)

tmp1 <- cbind(gnum,
             pred1_prob1,
             pred1_prob2,
             pred1_prob3,
             pred1_prob4,
             pred1_prob5,
             pred1_prob6,
             pred1_prob7,
             pred1_prob8,
             pred1_prob9,
             y)

tmp2 <- cbind(gnum,
             pred2_prob1,
             pred2_prob2,
             pred2_prob3,
             pred2_prob4,
             pred2_prob5,
             pred2_prob6,
             pred2_prob7,
             pred2_prob8,
             pred2_prob9,
             y)


prob_diff1 <- pred2_prob1 - pred1_prob1
prob_diff2 <- pred2_prob2 - pred1_prob2
prob_diff3 <- pred2_prob3 - pred1_prob3
prob_diff4 <- pred2_prob4 - pred1_prob4
prob_diff5 <- pred2_prob5 - pred1_prob5
prob_diff6 <- pred2_prob6 - pred1_prob6
prob_diff7 <- pred2_prob7 - pred1_prob7
prob_diff8 <- pred2_prob8 - pred1_prob8
prob_diff9 <- pred2_prob9 - pred1_prob9

##################################
# combine predicted probabilities
##################################
pred_probs <- as.data.frame(cbind(gnum,
                                 dat$realg,
                                 pred1_prob1,
                                 pred1_prob2,
                                 pred1_prob3,
                                 pred1_prob4,
                                 pred1_prob5,
                                 pred1_prob6,
                                 pred1_prob7,
                                 pred1_prob8,
                                 pred1_prob9,
                                 
                                 pred2_prob1,
                                 pred2_prob2,
                                 pred2_prob3,
                                 pred2_prob4,
                                 pred2_prob5,
                                 pred2_prob6,
                                 pred2_prob7,
                                 pred2_prob8,
                                 pred2_prob9,
                                 
                                 prob_diff1,
                                 prob_diff2,
                                 prob_diff3,
                                 prob_diff4,
                                 prob_diff5,
                                 prob_diff6,
                                 prob_diff7,
                                 prob_diff8,
                                 prob_diff9
                                 ))
pred_probs <- merge(pred_probs,prob_diff_GGG,by="gnum",all.x=T)
pred_probs$cldiff <- pred_probs$clprobm2 - pred_probs$clprobm1
pred_probs$mxldiff <- pred_probs$mixprobm2 - pred_probs$mixprobm1
# 220 observed governments
preds <- subset(pred_probs,pred_probs$V2==1)
preds$which_max <- NA
for (i in 1:220){
  preds$which_max[i] <- which.max(preds[i,3:11])
}



###############################################
# density plot of predicted probability changes
###############################################
tmp2 <- subset(preds,select = c(gnum,prob_diff1))
tmp3 <- subset(preds,select = c(gnum,prob_diff9))
tmp4 <- subset(preds,select = c(gnum,cldiff))
tmp5 <- subset(preds,select = c(gnum,mxldiff))
names(tmp2)<-"probdiff"
names(tmp3)<-"probdiff"
names(tmp4)<-"probdiff"
names(tmp5)<-"probdiff"
tmp2$cat <- "Q1"
tmp3$cat <- "Q9"
tmp4$cat <- "CL"
tmp5$cat <- "MXL"

tmpdat <- rbind(tmp2,
               tmp3,
               tmp4,
               tmp5)
names(tmpdat) <- c("gnum","probdiff","cat")

# Density plot of changes in probability
# Color by groups
ggplot(tmpdat, aes(x=probdiff)) + 
  geom_density(alpha=.5) +
  theme_bw()+
  geom_vline(xintercept=0, lty=2, lwd=0.6, colour="grey50")+
  xlab("Change in predicted probability")+
  ylab("Density")+
  facet_wrap(~cat,nrow=5)+
  scale_fill_discrete(name = "Models")
ggsave("figures/figure6.pdf")



########################################################################################################
# Matrix of scatter plot (only 220 observed government)
# full sample in appendix
# standard deviation of CBQs
cbqsd1 <- (quantile(preds$prob_diff1,probs=0.975) - quantile(preds$prob_diff1,probs=0.025))/(2*1.96)
cbqsd2 <- (quantile(preds$prob_diff2,probs=0.975) - quantile(preds$prob_diff2,probs=0.025))/(2*1.96)
cbqsd3 <- (quantile(preds$prob_diff3,probs=0.975) - quantile(preds$prob_diff3,probs=0.025))/(2*1.96)
cbqsd4 <- (quantile(preds$prob_diff4,probs=0.975) - quantile(preds$prob_diff4,probs=0.025))/(2*1.96)
cbqsd5 <- (quantile(preds$prob_diff5,probs=0.975) - quantile(preds$prob_diff5,probs=0.025))/(2*1.96)
cbqsd6 <- (quantile(preds$prob_diff6,probs=0.975) - quantile(preds$prob_diff6,probs=0.025))/(2*1.96)
cbqsd7 <- (quantile(preds$prob_diff7,probs=0.975) - quantile(preds$prob_diff7,probs=0.025))/(2*1.96)
cbqsd8 <- (quantile(preds$prob_diff8,probs=0.975) - quantile(preds$prob_diff8,probs=0.025))/(2*1.96)
cbqsd9 <- (quantile(preds$prob_diff9,probs=0.975) - quantile(preds$prob_diff9,probs=0.025))/(2*1.96)
# joint standard deviation: CBQ + CL
diffsig_cbq_cl1 <- sqrt(cbqsd1^2 + preds$clsd^2)
diffsig_cbq_cl2 <- sqrt(cbqsd2^2 + preds$clsd^2)
diffsig_cbq_cl3 <- sqrt(cbqsd3^2 + preds$clsd^2)
diffsig_cbq_cl4 <- sqrt(cbqsd4^2 + preds$clsd^2)
diffsig_cbq_cl5 <- sqrt(cbqsd5^2 + preds$clsd^2)
diffsig_cbq_cl6 <- sqrt(cbqsd6^2 + preds$clsd^2)
diffsig_cbq_cl7 <- sqrt(cbqsd7^2 + preds$clsd^2)
diffsig_cbq_cl8 <- sqrt(cbqsd8^2 + preds$clsd^2)
diffsig_cbq_cl9 <- sqrt(cbqsd9^2 + preds$clsd^2)
# joint standard deviation: CBQ + MXL
diffsig_cbq_mxl1 <- sqrt(cbqsd1^2 + preds$mxlsd^2)
diffsig_cbq_mxl2 <- sqrt(cbqsd2^2 + preds$mxlsd^2)
diffsig_cbq_mxl3 <- sqrt(cbqsd3^2 + preds$mxlsd^2)
diffsig_cbq_mxl4 <- sqrt(cbqsd4^2 + preds$mxlsd^2)
diffsig_cbq_mxl5 <- sqrt(cbqsd5^2 + preds$mxlsd^2)
diffsig_cbq_mxl6 <- sqrt(cbqsd6^2 + preds$mxlsd^2)
diffsig_cbq_mxl7 <- sqrt(cbqsd7^2 + preds$mxlsd^2)
diffsig_cbq_mxl8 <- sqrt(cbqsd8^2 + preds$mxlsd^2)
diffsig_cbq_mxl9 <- sqrt(cbqsd9^2 + preds$mxlsd^2)
# difference between change in predicted probabilities: CBQ vs. CL
diffdiff_cbq_cl1 <- preds$prob_diff1 - preds$cldiff
diffdiff_cbq_cl2 <- preds$prob_diff2 - preds$cldiff
diffdiff_cbq_cl3 <- preds$prob_diff3 - preds$cldiff
diffdiff_cbq_cl4 <- preds$prob_diff4 - preds$cldiff
diffdiff_cbq_cl5 <- preds$prob_diff5 - preds$cldiff
diffdiff_cbq_cl6 <- preds$prob_diff6 - preds$cldiff
diffdiff_cbq_cl7 <- preds$prob_diff7 - preds$cldiff
diffdiff_cbq_cl8 <- preds$prob_diff8 - preds$cldiff
diffdiff_cbq_cl9 <- preds$prob_diff9 - preds$cldiff
# difference between change in predicted probabilities: CBQ vs. MXL
diffdiff_cbq_mxl1 <- preds$prob_diff1 - preds$mxldiff
diffdiff_cbq_mxl2 <- preds$prob_diff2 - preds$mxldiff
diffdiff_cbq_mxl3 <- preds$prob_diff3 - preds$mxldiff
diffdiff_cbq_mxl4 <- preds$prob_diff4 - preds$mxldiff
diffdiff_cbq_mxl5 <- preds$prob_diff5 - preds$mxldiff
diffdiff_cbq_mxl6 <- preds$prob_diff6 - preds$mxldiff
diffdiff_cbq_mxl7 <- preds$prob_diff7 - preds$mxldiff
diffdiff_cbq_mxl8 <- preds$prob_diff8 - preds$mxldiff
diffdiff_cbq_mxl9 <- preds$prob_diff9 - preds$mxldiff
# Significantly different predictions: CBQ vs. CL
sigdiff_cbq_cl1 <- ifelse(abs(diffdiff_cbq_cl1/diffsig_cbq_cl1)>1.96,1,0)
sigdiff_cbq_cl2 <- ifelse(abs(diffdiff_cbq_cl2/diffsig_cbq_cl2)>1.96,1,0)
sigdiff_cbq_cl3 <- ifelse(abs(diffdiff_cbq_cl3/diffsig_cbq_cl3)>1.96,1,0)
sigdiff_cbq_cl4 <- ifelse(abs(diffdiff_cbq_cl4/diffsig_cbq_cl4)>1.96,1,0)
sigdiff_cbq_cl5 <- ifelse(abs(diffdiff_cbq_cl5/diffsig_cbq_cl5)>1.96,1,0)
sigdiff_cbq_cl6 <- ifelse(abs(diffdiff_cbq_cl6/diffsig_cbq_cl6)>1.96,1,0)
sigdiff_cbq_cl7 <- ifelse(abs(diffdiff_cbq_cl7/diffsig_cbq_cl7)>1.96,1,0)
sigdiff_cbq_cl8 <- ifelse(abs(diffdiff_cbq_cl8/diffsig_cbq_cl8)>1.96,1,0)
sigdiff_cbq_cl9 <- ifelse(abs(diffdiff_cbq_cl9/diffsig_cbq_cl9)>1.96,1,0)
# Significantly different predictions: CBQ vs. MXL
sigdiff_cbq_mxl1 <- ifelse(abs(diffdiff_cbq_mxl1/diffsig_cbq_mxl1)>1.96,1,0)
sigdiff_cbq_mxl2 <- ifelse(abs(diffdiff_cbq_mxl2/diffsig_cbq_mxl2)>1.96,1,0)
sigdiff_cbq_mxl3 <- ifelse(abs(diffdiff_cbq_mxl3/diffsig_cbq_mxl3)>1.96,1,0)
sigdiff_cbq_mxl4 <- ifelse(abs(diffdiff_cbq_mxl4/diffsig_cbq_mxl4)>1.96,1,0)
sigdiff_cbq_mxl5 <- ifelse(abs(diffdiff_cbq_mxl5/diffsig_cbq_mxl5)>1.96,1,0)
sigdiff_cbq_mxl6 <- ifelse(abs(diffdiff_cbq_mxl6/diffsig_cbq_mxl6)>1.96,1,0)
sigdiff_cbq_mxl7 <- ifelse(abs(diffdiff_cbq_mxl7/diffsig_cbq_mxl7)>1.96,1,0)
sigdiff_cbq_mxl8 <- ifelse(abs(diffdiff_cbq_mxl8/diffsig_cbq_mxl8)>1.96,1,0)
sigdiff_cbq_mxl9 <- ifelse(abs(diffdiff_cbq_mxl9/diffsig_cbq_mxl9)>1.96,1,0)
# Average different between changes in predicted probabilities: CBQ vs. MXL (only significant cases)
avg_diff_cbq_mxl1 <- mean((preds$prob_diff1 - preds$mxldiff)[which(sigdiff_cbq_mxl1==1)])
avg_diff_cbq_mxl2 <- mean((preds$prob_diff2 - preds$mxldiff)[which(sigdiff_cbq_mxl2==1)])
avg_diff_cbq_mxl3 <- mean((preds$prob_diff3 - preds$mxldiff)[which(sigdiff_cbq_mxl3==1)])
avg_diff_cbq_mxl4 <- mean((preds$prob_diff4 - preds$mxldiff)[which(sigdiff_cbq_mxl4==1)])
avg_diff_cbq_mxl5 <- mean((preds$prob_diff5 - preds$mxldiff)[which(sigdiff_cbq_mxl5==1)])
avg_diff_cbq_mxl6 <- mean((preds$prob_diff6 - preds$mxldiff)[which(sigdiff_cbq_mxl6==1)])
avg_diff_cbq_mxl7 <- mean((preds$prob_diff7 - preds$mxldiff)[which(sigdiff_cbq_mxl7==1)])
avg_diff_cbq_mxl8 <- mean((preds$prob_diff8 - preds$mxldiff)[which(sigdiff_cbq_mxl8==1)])
avg_diff_cbq_mxl9 <- mean((preds$prob_diff9 - preds$mxldiff)[which(sigdiff_cbq_mxl9==1)])
# Average different between changes in predicted probabilities: CBQ vs. CL (only significant cases)
avg_diff_cbq_cl1 <- mean((preds$prob_diff1 - preds$cldiff)[which(sigdiff_cbq_cl1==1)])
avg_diff_cbq_cl2 <- mean((preds$prob_diff2 - preds$cldiff)[which(sigdiff_cbq_cl2==1)])
avg_diff_cbq_cl3 <- mean((preds$prob_diff3 - preds$cldiff)[which(sigdiff_cbq_cl3==1)])
avg_diff_cbq_cl4 <- mean((preds$prob_diff4 - preds$cldiff)[which(sigdiff_cbq_cl4==1)])
avg_diff_cbq_cl5 <- mean((preds$prob_diff5 - preds$cldiff)[which(sigdiff_cbq_cl5==1)])
avg_diff_cbq_cl6 <- mean((preds$prob_diff6 - preds$cldiff)[which(sigdiff_cbq_cl6==1)])
avg_diff_cbq_cl7 <- mean((preds$prob_diff7 - preds$cldiff)[which(sigdiff_cbq_cl7==1)])
avg_diff_cbq_cl8 <- mean((preds$prob_diff8 - preds$cldiff)[which(sigdiff_cbq_cl8==1)])
avg_diff_cbq_cl9 <- mean((preds$prob_diff9 - preds$cldiff)[which(sigdiff_cbq_cl9==1)])



##########################################################################################################
# 2 * 2 plot panels:  220 obs
##########################################################################################################
pdf("figures/figure5.pdf",width=8,height=8)
par(mfrow=c(2,2))
plot(NA,
     xlim=c(-1,1),
     ylim=c(-1,1),
     asp=1,
     xlab = "Change in Conditional Logit Probability",
     ylab = "Change in CBQ Probability (Q1)",
     bty="l")
points(preds$cldiff,preds$prob_diff1,col=ifelse(sigdiff_cbq_cl1==1,"black","gray") )
legend("topleft", legend = c("Q1",
                             paste("# sig. diff. predicts =", sum(sigdiff_cbq_cl1) ),
                             paste("Avg. diff. =", round(avg_diff_cbq_cl1,2 ) )
)
)
abline(0,1,lty="dashed")


plot(NA,
     xlim=c(-1,1),
     ylim=c(-1,1),
     asp=1,
     xlab = "Change in Conditional Logit Probability",
     ylab = "Change in CBQ Probability (Q9)",
     bty="l")
points(preds$cldiff,preds$prob_diff9,col=ifelse(sigdiff_cbq_cl9==1,"black","gray") )
legend("topleft", legend = c("Q9",
                             paste("# sig. diff. predicts =", sum(sigdiff_cbq_cl9) ),
                             paste("Avg. diff. =",round(avg_diff_cbq_cl9,2 ) )
)
)
abline(0,1,lty="dashed")

plot(NA,
     xlim=c(-1,1),
     ylim=c(-1,1),
     asp=1,
     xlab = "Change in Mixed Logit Probability",
     ylab = "Change in CBQ Probability (Q1)",
     bty="l")
points(preds$mxldiff,preds$prob_diff1,col=ifelse(sigdiff_cbq_mxl1==1,"black","gray") )
legend("topleft", legend = c("Q1",
                             paste("# sig. diff. predicts =", sum(sigdiff_cbq_mxl1) ),
                             paste("Avg. diff. =",round(avg_diff_cbq_mxl1,2 ) )
)
)
abline(0,1,lty="dashed")

plot(NA,
     xlim=c(-1,1),
     ylim=c(-1,1),
     asp=1,
     xlab = "Change in Mixed Logit Probability",
     ylab = "Change in CBQ Probability (Q9)",
     bty="l")
points(preds$mxldiff,preds$prob_diff9,col=ifelse(sigdiff_cbq_mxl9==1,"black","gray") )
legend("topleft", legend = c("Q9",
                             paste("# sig. diff. predicts =", sum(sigdiff_cbq_mxl9) ),
                             paste("Avg. diff. =",round(avg_diff_cbq_mxl9,2 ) )
)
)
abline(0,1,lty="dashed")

dev.off()


